{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19",
    "_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['predict the news category']\n"
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "sns.set_style('darkgrid')\n",
    "import os\n",
    "print(os.listdir(\"../input\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "_cell_guid": "79c7e3d0-c299-4dcb-8224-4455121ee9b0",
    "_uuid": "d629ff2d2480ee46fbb7e2d37f6b5fab8052498a"
   },
   "outputs": [],
   "source": [
    "from fastai.text import *\n",
    "from fastai.imports import *\n",
    "from fastai.text import *\n",
    "from fastai import *"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[PosixPath('../input/predict the news category/Sample_submission.xlsx'),\n",
       " PosixPath('../input/predict the news category/Data_Train.xlsx'),\n",
       " PosixPath('../input/predict the news category/Data_Test.xlsx')]"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "path = Path('../input/predict the news category/')\n",
    "path.ls()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/conda/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:516: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
      "  _np_qint8 = np.dtype([(\"qint8\", np.int8, 1)])\n",
      "/opt/conda/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:517: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
      "  _np_quint8 = np.dtype([(\"quint8\", np.uint8, 1)])\n",
      "/opt/conda/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:518: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
      "  _np_qint16 = np.dtype([(\"qint16\", np.int16, 1)])\n",
      "/opt/conda/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:519: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
      "  _np_quint16 = np.dtype([(\"quint16\", np.uint16, 1)])\n",
      "/opt/conda/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:520: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
      "  _np_qint32 = np.dtype([(\"qint32\", np.int32, 1)])\n",
      "/opt/conda/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:525: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
      "  np_resource = np.dtype([(\"resource\", np.ubyte, 1)])\n",
      "/opt/conda/lib/python3.6/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py:541: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
      "  _np_qint8 = np.dtype([(\"qint8\", np.int8, 1)])\n",
      "/opt/conda/lib/python3.6/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py:542: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
      "  _np_quint8 = np.dtype([(\"quint8\", np.uint8, 1)])\n",
      "/opt/conda/lib/python3.6/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py:543: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
      "  _np_qint16 = np.dtype([(\"qint16\", np.int16, 1)])\n",
      "/opt/conda/lib/python3.6/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py:544: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
      "  _np_quint16 = np.dtype([(\"quint16\", np.uint16, 1)])\n",
      "/opt/conda/lib/python3.6/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py:545: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
      "  _np_qint32 = np.dtype([(\"qint32\", np.int32, 1)])\n",
      "/opt/conda/lib/python3.6/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py:550: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
      "  np_resource = np.dtype([(\"resource\", np.ubyte, 1)])\n"
     ]
    }
   ],
   "source": [
    "train = pd.read_excel(path/'Data_Train.xlsx')\n",
    "test = pd.read_excel(path/'Data_Test.xlsx')\n",
    "sub = pd.read_excel(path/'Sample_submission.xlsx')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "((7628, 2), (2748, 1), (2748, 1))"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train.shape, test.shape, sub.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>STORY</th>\n",
       "      <th>SECTION</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>But the most painful was the huge reversal in ...</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>How formidable is the opposition alliance amon...</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                               STORY  SECTION\n",
       "0  But the most painful was the huge reversal in ...        3\n",
       "1  How formidable is the opposition alliance amon...        0"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train.head(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>STORY</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2019 will see gadgets like gaming smartphones ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>It has also unleashed a wave of changes in the...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                               STORY\n",
       "0  2019 will see gadgets like gaming smartphones ...\n",
       "1  It has also unleashed a wave of changes in the..."
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test.head(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.axes._subplots.AxesSubplot at 0x7f589b533358>"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAY4AAAEKCAYAAAAFJbKyAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAEytJREFUeJzt3X+QXWV9x/H3sgRCTDQwrBBDFKbG7wiooEygpa1YFYHRgj9gRCsRqfgHSHGwHaCOMKAtVUAzlDJFiRAH+TGImjqZItJRWjsowjBFwK+TIsKaFIIh/DD8SJbbP85Zucbdu/dJ7t2zP96vmZ2997nPuee7Z3bzyXmec5470Gq1kCSpWzs1XYAkaXoxOCRJRQwOSVIRg0OSVMTgkCQVMTgkSUUMDklSEYNDklTE4JAkFdm56QL64cUXX2yNjHhHvCSVmDNn8HFgaKJ+MzI4RkZabNq0uekyJGlaGRpa8Ktu+jlUJUkqYnBIkooYHJKkIgaHJKmIwSFJKmJwSJKKGBySpCIGhySpiMEhSSoyI+8c1+Tb4xVzGNxlbtNlNG7khefY+OSWpsuQ+srgUE8M7jKXhy94Q9NlNO7Vn70XMDg0szlUJUkqYnBIkooYHJKkIgaHJKmIwSFJKmJwSJKKGBySpCIGhySpiMEhSSpicEiSihgckqQiBockqYjBIUkqYnBIkor0bVn1iFgCrAL2Bl4ErszMFRFxPvBxYEPd9dzMXFNvcw5wCjACnJGZt9TtRwErgEHgq5l5Ub/qliR11s/P49gKnJWZd0fEAuCuiLi1fu1LmXlxe+eI2B/4IHAA8Crg+xHxuvrly4F3AsPAnRGxOjPv72PtkqRx9C04MnM9sL5+/HREPAAs7rDJscD1mfk88MuIWAssq19bm5kPAkTE9XVfg0OSGjApcxwRsS9wMPDjuun0iPifiFgZEbvXbYuBR9o2G67bxmuXJDWg7x8dGxHzgW8CZ2bmUxFxBXAh0Kq/XwJ8DBgYY/MWY4dbq9M+BwcHWLhw3g7VLW0vf/c00/U1OCJiDlVoXJuZNwNk5qNtr38F+G79dBhY0rb5PsC6+vF47WMaGWmxadPmHSteRYaGFjRdwpTh756mq27/jvt5VdUAcBXwQGZe2ta+qJ7/AHgv8LP68WrgGxFxKdXk+FLgJ1RnIksjYj/g11QT6B/qV92SpM76ecZxOPAR4N6IuKduOxc4MSIOohpuegj4BEBm3hcRN1JNem8FTsvMEYCIOB24hepy3JWZeV8f65YkdTDQanWcLpiWtmwZaTlcMLmGhhbw8AVvaLqMxr36s/eyYcPTTZchbZehoQV3AYdM1M87xyVJRQwOSVIRg0OSVMTgkCQVMTgkSUUMDklSEYNDklTE4JAkFTE4JElFDA5JUhGDQ5JUxOCQJBUxOCRJRQwOSVIRg0OSVMTgkCQVMTgkSUUMDklSEYNDklTE4JAkFTE4JElFDA5JUhGDQ5JUxOCQJBUxOCRJRQwOSVIRg0OSVMTgkCQVMTgkSUV27tcbR8QSYBWwN/AicGVmroiIPYAbgH2Bh4ATMvOJiBgAVgDHAJuBj2bm3fV7LQc+U7/15zLzmn7VLUnqrJ9nHFuBszLz9cBhwGkRsT9wNnBbZi4FbqufAxwNLK2/TgWuAKiD5jzgUGAZcF5E7N7HuiVJHfQtODJz/egZQ2Y+DTwALAaOBUbPGK4BjqsfHwusysxWZt4BLIyIRcC7gFszc2NmPgHcChzVr7olSZ31baiqXUTsCxwM/BjYKzPXQxUuEfHKutti4JG2zYbrtvHaxzU4OMDChfN6U7xUyN89zXR9D46ImA98EzgzM5+KiPG6DozR1urQPq6RkRabNm0uqlM7ZmhoQdMlTBn+7mm66vbvuK9XVUXEHKrQuDYzb66bH62HoKi/P1a3DwNL2jbfB1jXoV2S1IC+BUd9ldRVwAOZeWnbS6uB5fXj5cB32tpPioiBiDgMeLIe0roFODIidq8nxY+s2yRJDejnUNXhwEeAeyPinrrtXOAi4MaIOAV4GDi+fm0N1aW4a6kuxz0ZIDM3RsSFwJ11vwsyc2Mf65YkdTDQanWcLpiWtmwZaTnOPLmGhhbw8AVvaLqMxr36s/eyYcPTTZchbZehoQV3AYdM1G9SrqqS1L35r5jDbrvMbbqMKeHZF57jmSe3NF2GtmFwSFPMbrvM5fDLDm+6jCnhR5/8Ec9gcEw1rlUlSSpicEiSihgckqQiBockqYjBIUkqYnBIkooYHJKkIgaHJKmIwSFJKmJwSJKKGBySpCIGhySpiMEhSSpicEiSihgckqQiBockqcis/SCn+S+fy267zmm6jCnh2ee38MxTzzVdhqRpYtYGx267zuEtf7uq6TKmhLu+eBLPYHBI6o5DVZKkIgaHJKmIwSFJKtJVcETEbd20SZJmvo6T4xExF5gH7BkRuwMD9UsvB17V59okSVPQRFdVfQI4kyok7uKl4HgKuLyPdUmSpqiOwZGZK4AVEfHJzLxskmqSJE1hXd3HkZmXRcSfAPu2b5OZ3gghSbNMV8EREV8H/gi4Bxipm1vAuMERESuBdwOPZeaBddv5wMeBDXW3czNzTf3aOcAp9fufkZm31O1HASuAQeCrmXlRwc8nSeqxbu8cPwTYPzNbBe99NfDP/GG4fCkzL25viIj9gQ8CB1DNp3w/Il5Xv3w58E5gGLgzIlZn5v0FdUiSeqjb+zh+Buxd8saZeTuwscvuxwLXZ+bzmflLYC2wrP5am5kPZuYLwPV1X0lSQ7o949gTuD8ifgI8P9qYmX+5Hfs8PSJOAn4KnJWZTwCLgTva+gzXbQCPbNN+6HbsU5LUI90Gx/k92t8VwIVU8yMXApcAH+Oly3zbtRj7jGjC4bLBwQEWLpy3A2XOPh6v3vFY9pbHc+rp9qqqH/ZiZ5n56OjjiPgK8N366TCwpK3rPsC6+vF47eMaGWmxadPmjn2GhhZ0UfHsMdHxmojH8yUey97a0eOp7nX7u9ftVVVP89L/9HcB5gC/zcyXlxQVEYsyc3399L1UcycAq4FvRMSlVJPjS4GfUJ2JLI2I/YBfU02gf6hkn5Kk3ur2jOP3YigijqOauB5XRFwHHEG1XMkwcB5wREQcRBVCD1HdmU5m3hcRNwL3A1uB0zJzpH6f04FbqC7HXZmZ93X7w0mSem+7PsgpM78dEWdP0OfEMZqv6tD/88Dnx2hfA6wpLlKS1BfdDlW9r+3pTlT3dZTc0yFJmiG6PeN4T9vjrVTDTN5PIUmzULdzHCf3uxBJ0vTQ7VDVPsBlwOFUQ1T/BfxNZg73sTZJ0hTU7ZIjX6O6ZPZVVHd0/1vdJkmaZbqd4xjKzPaguDoizuxHQZKkqa3b4Hg8Iv4KuK5+fiLwm/6UJEmayrodqvoYcALwf8B64AOAE+aSNAt1e8ZxIbC8XsmWiNgDuJgqUCRJs0i3ZxxvHA0NgMzcCBzcn5IkSVNZt8GxU0TsPvqkPuPYruVKJEnTW7f/+F8C/HdE3ER1H8cJjLGulCRp5uvqjCMzVwHvBx4FNgDvy8yv97MwSdLU1PVwU2beT7XsuSRpFnOeQtKMtvv8Oey829ymy5gStj77HE88s2WH38fgkDSj7bzbXH74529tuowp4a23/xB6EBzdXlUlSRJgcEiSChkckqQiBockqYjBIUkqYnBIkooYHJKkIgaHJKmIwSFJKmJwSJKKGBySpCIGhySpSN8WOYyIlcC7gccy88C6bQ/gBmBf4CHghMx8IiIGgBXAMcBm4KOZeXe9zXLgM/Xbfi4zr+lXzZKkifXzjONq4Kht2s4GbsvMpcBt9XOAo4Gl9depwBXwu6A5DzgUWAac1/4RtpKkyde34MjM24GN2zQfC4yeMVwDHNfWviozW5l5B7AwIhYB7wJuzcyNmfkEcCt/GEaSpEk02XMce2XmeoD6+yvr9sXAI239huu28dolSQ2ZKh/kNDBGW6tDe0eDgwMsXDhvh4uaTTxeveOx7C2PZ2/14nhOdnA8GhGLMnN9PRT1WN0+DCxp67cPsK5uP2Kb9h9MtJORkRabNm3u2GdoaEH3Vc8CEx2viXg8X+Kx7C2PZ291Op7dHqvJHqpaDSyvHy8HvtPWflJEDETEYcCT9VDWLcCREbF7PSl+ZN0mSWpIPy/HvY7qbGHPiBimujrqIuDGiDgFeBg4vu6+hupS3LVUl+OeDJCZGyPiQuDOut8FmbnthLskaRL1LTgy88RxXnr7GH1bwGnjvM9KYGUPS5Mk7QDvHJckFTE4JElFDA5JUhGDQ5JUxOCQJBUxOCRJRQwOSVIRg0OSVMTgkCQVMTgkSUUMDklSEYNDklTE4JAkFTE4JElFDA5JUhGDQ5JUxOCQJBUxOCRJRQwOSVIRg0OSVMTgkCQVMTgkSUUMDklSEYNDklTE4JAkFTE4JElFDA5JUhGDQ5JUxOCQJBXZuYmdRsRDwNPACLA1Mw+JiD2AG4B9gYeAEzLziYgYAFYAxwCbgY9m5t0NlC1Jotkzjrdl5kGZeUj9/GzgtsxcCtxWPwc4Glhaf50KXDHplUqSfmcqDVUdC1xTP74GOK6tfVVmtjLzDmBhRCxqokBJUkNDVUAL+F5EtIB/zcwrgb0ycz1AZq6PiFfWfRcDj7RtO1y3rR/vzQcHB1i4cF5/Kp+hPF6947HsLY9nb/XieDYVHIdn5ro6HG6NiJ936DswRlur05uPjLTYtGlzxwKGhhZMXOUsMtHxmojH8yUey97yePZWp+PZ7bFqZKgqM9fV3x8DvgUsAx4dHYKqvz9Wdx8GlrRtvg+wbvKqlSS1m/TgiIiXRcSC0cfAkcDPgNXA8rrbcuA79ePVwEkRMRARhwFPjg5pSZImXxNDVXsB34qI0f1/IzP/PSLuBG6MiFOAh4Hj6/5rqC7FXUt1Oe7Jk1+yJGnUpAdHZj4IvGmM9t8Abx+jvQWcNgmlSZK6MJUux5UkTQMGhySpiMEhSSpicEiSihgckqQiBockqYjBIUkqYnBIkooYHJKkIgaHJKmIwSFJKmJwSJKKGBySpCIGhySpiMEhSSpicEiSihgckqQiBockqYjBIUkqYnBIkooYHJKkIgaHJKmIwSFJKmJwSJKKGBySpCIGhySpiMEhSSpicEiSiuzcdAHdioijgBXAIPDVzLyo4ZIkaVaaFmccETEIXA4cDewPnBgR+zdblSTNTtMiOIBlwNrMfDAzXwCuB45tuCZJmpWmS3AsBh5pez5ct0mSJtlAq9VquoYJRcTxwLsy86/r5x8BlmXmJ8fZZAPwq8mqT5JmiNcAQxN1mi6T48PAkrbn+wDrOvSf8AeXJG2f6RIcdwJLI2I/4NfAB4EPNVuSJM1O02KOIzO3AqcDtwAPADdm5n3NViVJs9O0mOOQJE0d0+KMQ5I0dRgckqQi02VyfMZxCZXeiYiVwLuBxzLzwKbrmc4iYgmwCtgbeBG4MjNXNFvV9BURc4HbgV2p/r29KTPPa7aqHecZRwNcQqXnrgaOarqIGWIrcFZmvh44DDjN380d8jzwF5n5JuAg4KiIOKzhmnaYwdEMl1Dpocy8HdjYdB0zQWauz8y768dPU13F6CoN2ykzW5n5TP10Tv017a9IcqiqGWMtoXJoQ7VIY4qIfYGDgR83XMq0Vo8w3AW8Frg8M6f98fSMoxkDY7RN+/+FaOaIiPnAN4EzM/OppuuZzjJzJDMPolrxYllETPt5OIOjGaVLqEiTJiLmUIXGtZl5c9P1zBSZuQn4ATNgPs7gaMbvllCJiF2ollBZ3XBNEhExAFwFPJCZlzZdz3QXEUMRsbB+vBvwDuDnzVa147xzvCERcQzwZarLcVdm5ucbLmnaiojrgCOAPYFHgfMy86pGi5qmIuJPgf8E7qW6HBfg3Mxc01xV01dEvBG4hurvfCeq5ZIuaLaqHWdwSJKKOFQlSSpicEiSihgckqQiBockqYjBIUkq4pIj0gQi4u+pPqp4hOoS1U8A/wQsAp6tu63NzA/U/U8C/o5qhYABYCWwH3A4sEv9OOvtPke1su93M/Om+r6eLwDvqfd1P3BaZg7X790CLs3Ms+rnnwbmZ+b5/fr5pW15xiF1EBF/TPUP+5sz841UN3CNrjP24cw8qP4aDY2jgTOBIzPzAODNwJOZeVq97MQxwP+2bXfTNrv8B2AB8LrMXAp8G7i5vjEPqtVW3xcRe/bvp5Y6MzikzhYBj2fm8wCZ+Xhmdloe5hzg06N9MvO5zPxKNzuKiHnAycCnMnOk3v5r1Etz1922AlcCn9qeH0bqBYND6ux7wJKI+EVE/EtEvLXttWsj4p7664t124FUK6Fuj9cCD4+xqOBPgQPanl8OfDgiXrGd+5F2iHMcUgeZ+UxEvAX4M+BtwA0RcXb98ocz86c93N0AY6+S/HvtmflURKwCzuClORZp0njGIU2gXhb7B/VHfp4OvL9D9/uAt2znrtYCr4mIBdu0v5lqkrzdl4FTgJdt576k7WZwSB1EZWlb00HArzps8o/AFyJi73r7XSPijG72lZm/pVoQ79L6w39Gr9CaB/zHNn03AjdShYc0qRyqkjqbD1xWL429leqs4FTgJqo5jtGhoscz8x2ZuSYi9gK+X18J1aK6HLdb5wAXA7+IiBepluB+b2aONYR1CdUZkDSpXB1XklTEoSpJUhGDQ5JUxOCQJBUxOCRJRQwOSVIRg0OSVMTgkCQVMTgkSUX+H0VDIfCUoMuZAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "sns.countplot(x='SECTION', data=train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "def random_seed(seed_value):\n",
    "    import random \n",
    "    random.seed(seed_value)  \n",
    "    import numpy as np\n",
    "    np.random.seed(seed_value)  \n",
    "    import torch\n",
    "    torch.manual_seed(seed_value)  \n",
    "    \n",
    "    if torch.cuda.is_available(): \n",
    "        torch.cuda.manual_seed(seed_value)\n",
    "        torch.cuda.manual_seed_all(seed_value)  \n",
    "        torch.backends.cudnn.deterministic = True   \n",
    "        torch.backends.cudnn.benchmark = False"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: left;\">\n",
       "      <th>epoch</th>\n",
       "      <th>train_loss</th>\n",
       "      <th>valid_loss</th>\n",
       "      <th>accuracy</th>\n",
       "      <th>time</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>3.884861</td>\n",
       "      <td>3.618055</td>\n",
       "      <td>0.341282</td>\n",
       "      <td>00:33</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: left;\">\n",
       "      <th>epoch</th>\n",
       "      <th>train_loss</th>\n",
       "      <th>valid_loss</th>\n",
       "      <th>accuracy</th>\n",
       "      <th>time</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>3.383326</td>\n",
       "      <td>3.510152</td>\n",
       "      <td>0.353401</td>\n",
       "      <td>00:40</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>3.297758</td>\n",
       "      <td>3.433312</td>\n",
       "      <td>0.364023</td>\n",
       "      <td>00:40</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>3.114028</td>\n",
       "      <td>3.382402</td>\n",
       "      <td>0.373136</td>\n",
       "      <td>00:39</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>2.867497</td>\n",
       "      <td>3.353268</td>\n",
       "      <td>0.377482</td>\n",
       "      <td>00:40</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>2.580076</td>\n",
       "      <td>3.357384</td>\n",
       "      <td>0.382314</td>\n",
       "      <td>00:40</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>5</td>\n",
       "      <td>2.304793</td>\n",
       "      <td>3.380360</td>\n",
       "      <td>0.384887</td>\n",
       "      <td>00:40</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>6</td>\n",
       "      <td>2.100055</td>\n",
       "      <td>3.420664</td>\n",
       "      <td>0.385018</td>\n",
       "      <td>00:40</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>7</td>\n",
       "      <td>1.930216</td>\n",
       "      <td>3.454225</td>\n",
       "      <td>0.384191</td>\n",
       "      <td>00:40</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>8</td>\n",
       "      <td>1.888747</td>\n",
       "      <td>3.466512</td>\n",
       "      <td>0.384073</td>\n",
       "      <td>00:40</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: left;\">\n",
       "      <th>epoch</th>\n",
       "      <th>train_loss</th>\n",
       "      <th>valid_loss</th>\n",
       "      <th>accuracy</th>\n",
       "      <th>time</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>0.155208</td>\n",
       "      <td>0.100051</td>\n",
       "      <td>0.974460</td>\n",
       "      <td>00:15</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: left;\">\n",
       "      <th>epoch</th>\n",
       "      <th>train_loss</th>\n",
       "      <th>valid_loss</th>\n",
       "      <th>accuracy</th>\n",
       "      <th>time</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>0.137248</td>\n",
       "      <td>0.090150</td>\n",
       "      <td>0.968566</td>\n",
       "      <td>00:18</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "\n",
       "    <div>\n",
       "        <style>\n",
       "            /* Turns off some styling */\n",
       "            progress {\n",
       "                /* gets rid of default border in Firefox and Opera. */\n",
       "                border: none;\n",
       "                /* Needs to be in here for Safari polyfill so background images work as expected. */\n",
       "                background-size: auto;\n",
       "            }\n",
       "            .progress-bar-interrupted, .progress-bar-interrupted::-webkit-progress-bar {\n",
       "                background: #F44336;\n",
       "            }\n",
       "        </style>\n",
       "      <progress value='0' class='' max='1', style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
       "      0.00% [0/1 00:00<00:00]\n",
       "    </div>\n",
       "    \n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: left;\">\n",
       "      <th>epoch</th>\n",
       "      <th>train_loss</th>\n",
       "      <th>valid_loss</th>\n",
       "      <th>accuracy</th>\n",
       "      <th>time</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "  </tbody>\n",
       "</table><p>\n",
       "\n",
       "    <div>\n",
       "        <style>\n",
       "            /* Turns off some styling */\n",
       "            progress {\n",
       "                /* gets rid of default border in Firefox and Opera. */\n",
       "                border: none;\n",
       "                /* Needs to be in here for Safari polyfill so background images work as expected. */\n",
       "                background-size: auto;\n",
       "            }\n",
       "            .progress-bar-interrupted, .progress-bar-interrupted::-webkit-progress-bar {\n",
       "                background: #F44336;\n",
       "            }\n",
       "        </style>\n",
       "      <progress value='169' class='' max='222', style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
       "      76.13% [169/222 00:22<00:07 0.0859]\n",
       "    </div>\n",
       "    "
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "from sklearn.metrics import accuracy_score \n",
    "y_pred_totcb = []\n",
    "from sklearn.model_selection import KFold, RepeatedKFold\n",
    "fold = KFold(n_splits=15, shuffle=True, random_state=0)\n",
    "i=1\n",
    "\n",
    "for train_index, test_index in fold.split(train):\n",
    "    \n",
    "    train_df = train.iloc[train_index]\n",
    "    valid_df = train.iloc[test_index]\n",
    "\n",
    "    random_seed(10)\n",
    "    \n",
    "    data_lm = TextLMDataBunch.from_df(Path(path), train_df, valid_df, test, text_cols=[0], bs=32)\n",
    "    data_clas = TextClasDataBunch.from_df(Path(path), train_df, valid_df, test, text_cols=[0], label_cols=1, bs=32)\n",
    "    \n",
    "    learn = language_model_learner(data_lm, AWD_LSTM, drop_mult=0.4, model_dir='/tmp/model/')\n",
    "    learn.fit_one_cycle(1, 1e-2, moms=(0.8, 0.7))\n",
    "    learn.unfreeze()\n",
    "    learn.fit_one_cycle(9, 1e-3, moms=(0.8,0.7))\n",
    "    learn.save_encoder('model_enc')\n",
    "    \n",
    "    learn = text_classifier_learner(data_clas, AWD_LSTM, drop_mult=0.4, model_dir='/tmp/model/')\n",
    "    learn.load_encoder('model_enc')\n",
    "    learn.fit_one_cycle(1, 1e-2, moms=(0.8, 0.7))\n",
    "    learn.freeze_to(-2)\n",
    "    learn.fit_one_cycle(1, slice(1e-2/(2.6**4),1e-2), moms=(0.8,0.7))\n",
    "    learn.freeze_to(-3)\n",
    "    learn.fit_one_cycle(1, slice(5e-3/(2.6**4),5e-3), moms=(0.8,0.7))\n",
    "    learn.unfreeze()\n",
    "    learn.fit_one_cycle(5, slice(1e-3/(2.6**4),1e-3), moms=(0.8,0.7))\n",
    "   \n",
    "    log_preds, test_labels = learn.get_preds(ds_type=DatasetType.Test, ordered=True)\n",
    "    preds = np.argmax(log_preds, 1)\n",
    "    y_pred_totcb.append(preds)\n",
    "    print(f'fold {i} completed')\n",
    "    i = i+1"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Submission"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.DataFrame()\n",
    "for i in range(15):\n",
    "    col_name = 'SECTION_' + str(i)\n",
    "    df[col_name] =  y_pred_totcb[i] "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(2748, 15)"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>SECTION_0</th>\n",
       "      <th>SECTION_1</th>\n",
       "      <th>SECTION_2</th>\n",
       "      <th>SECTION_3</th>\n",
       "      <th>SECTION_4</th>\n",
       "      <th>SECTION_5</th>\n",
       "      <th>SECTION_6</th>\n",
       "      <th>SECTION_7</th>\n",
       "      <th>SECTION_8</th>\n",
       "      <th>SECTION_9</th>\n",
       "      <th>SECTION_10</th>\n",
       "      <th>SECTION_11</th>\n",
       "      <th>SECTION_12</th>\n",
       "      <th>SECTION_13</th>\n",
       "      <th>SECTION_14</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2743</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2744</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2745</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2746</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2747</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      SECTION_0  SECTION_1  SECTION_2  SECTION_3  SECTION_4  SECTION_5  \\\n",
       "2743          1          1          1          1          1          1   \n",
       "2744          1          1          1          1          1          1   \n",
       "2745          1          1          1          1          1          1   \n",
       "2746          0          0          3          3          0          0   \n",
       "2747          1          1          1          1          1          1   \n",
       "\n",
       "      SECTION_6  SECTION_7  SECTION_8  SECTION_9  SECTION_10  SECTION_11  \\\n",
       "2743          1          1          1          1           1           1   \n",
       "2744          1          1          1          1           1           1   \n",
       "2745          1          1          1          1           1           1   \n",
       "2746          0          0          0          0           0           0   \n",
       "2747          1          1          1          1           1           1   \n",
       "\n",
       "      SECTION_12  SECTION_13  SECTION_14  \n",
       "2743           1           1           1  \n",
       "2744           1           1           1  \n",
       "2745           1           1           1  \n",
       "2746           3           0           0  \n",
       "2747           1           1           1  "
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.tail()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>SECTION</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2743</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2744</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2745</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2746</th>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2747</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      SECTION\n",
       "2743        1\n",
       "2744        1\n",
       "2745        1\n",
       "2746        0\n",
       "2747        1"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sub = pd.DataFrame()\n",
    "sub['SECTION'] = df.mode(axis=1)[0]\n",
    "sub.tail()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1    1183\n",
       "2     826\n",
       "0     417\n",
       "3     322\n",
       "Name: SECTION, dtype: int64"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sub['SECTION'].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<a download=\"submission.csv\" href=\"data:text/csv;base64,U0VDVElPTgoxCjIKMQoxCjEKMQoxCjIKMQoyCjAKMwoyCjEKMgoxCjMKMgozCjIKMgoyCjIKMAowCjIKMgozCjMKMAoxCjMKMgowCjIKMgoyCjIKMAoxCjAKMQozCjIKMgoyCjEKMQowCjEKMwoyCjEKMgoyCjAKMQoxCjAKMQoxCjIKMwoyCjEKMQoyCjAKMAoxCjEKMQoyCjAKMAoxCjEKMQoyCjIKMwowCjMKMAoyCjIKMgoyCjEKMQoxCjIKMQoxCjEKMQowCjIKMgoyCjEKMAoxCjMKMQoxCjIKMAoyCjMKMQoxCjIKMgoxCjIKMwozCjIKMQoxCjMKMAoyCjAKMwozCjMKMgoxCjEKMgoxCjAKMwoxCjEKMQoxCjIKMQowCjIKMQoyCjEKMgoyCjIKMQoyCjEKMQoxCjIKMQoxCjEKMQoyCjIKMQowCjAKMQoyCjAKMQoxCjIKMgoyCjIKMQoxCjEKMgoxCjIKMQoxCjMKMQoyCjIKMQozCjEKMQoyCjIKMQoyCjEKMQoxCjIKMQozCjEKMgoxCjEKMQoyCjEKMQoxCjMKMgozCjIKMgozCjEKMQoyCjEKMQoyCjAKMQowCjEKMwoxCjEKMQoxCjAKMAoxCjEKMwoxCjIKMgowCjIKMQoyCjIKMQoyCjEKMgoyCjEKMQozCjEKMQoyCjIKMQoxCjMKMAoxCjIKMgoyCjAKMgoyCjAKMQoxCjIKMQowCjIKMQoxCjIKMQozCjEKMQoxCjEKMQoyCjEKMQowCjMKMQoyCjIKMAoxCjIKMgoxCjEKMQoxCjIKMQoxCjIKMQoyCjEKMgoxCjEKMgowCjEKMAoxCjEKMAoyCjMKMQozCjEKMQoyCjEKMwowCjIKMAoyCjEKMgoxCjIKMwoxCjAKMAoyCjAKMQoyCjEKMQowCjMKMQoyCjIKMwoyCjEKMgoxCjEKMQoyCjIKMQoxCjMKMgowCjMKMQoxCjIKMgozCjIKMQoxCjEKMQoyCjEKMAowCjIKMgoxCjEKMgoxCjEKMgoyCjIKMgowCjEKMQoxCjIKMAozCjMKMAoxCjEKMgozCjIKMQoxCjIKMQoyCjIKMgoyCjEKMQozCjIKMwozCjIKMQoyCjEKMQowCjEKMAozCjAKMgoxCjEKMQoxCjAKMQoxCjEKMAoxCjAKMgoyCjIKMgoyCjEKMQoxCjEKMQozCjAKMAoyCjIKMgoxCjEKMgoyCjEKMgowCjIKMQoxCjEKMgoyCjEKMQozCjMKMQozCjEKMgowCjEKMwowCjEKMQozCjMKMwowCjMKMQoxCjEKMQoxCjEKMAoyCjIKMQowCjEKMQozCjMKMQoxCjMKMgoxCjEKMgozCjEKMAoyCjEKMAoyCjEKMQozCjAKMgoxCjEKMQoxCjIKMQoyCjEKMQoyCjEKMgoxCjIKMgowCjEKMQozCjEKMQoxCjIKMgozCjIKMgoxCjEKMgoyCjEKMQoxCjEKMgowCjMKMQoxCjEKMQoxCjMKMQoxCjEKMQoxCjIKMQoxCjMKMAoxCjEKMgoyCjAKMQozCjEKMgoxCjIKMgoxCjIKMAoxCjEKMgoxCjIKMQoxCjAKMQozCjEKMgoxCjIKMgoyCjIKMgoxCjEKMQoxCjIKMQoxCjIKMwozCjIKMQowCjEKMAoxCjIKMgoxCjEKMgoxCjEKMQoyCjMKMgoxCjEKMQoxCjIKMQowCjEKMwoxCjEKMQowCjEKMQoxCjAKMgoxCjAKMwoxCjEKMQozCjEKMgoyCjEKMgowCjEKMQoxCjMKMQoxCjMKMAoyCjAKMwowCjEKMQoxCjIKMgowCjEKMgoyCjEKMQoxCjEKMgozCjMKMgoyCjEKMgoyCjEKMAoxCjEKMQozCjEKMgoxCjEKMQoxCjEKMgoyCjIKMQoxCjIKMQozCjAKMgowCjAKMAoyCjIKMgoyCjAKMgoyCjMKMAoyCjEKMQozCjEKMQoyCjIKMwoxCjAKMQoxCjEKMQoyCjEKMgozCjMKMQowCjEKMAoxCjEKMgoyCjEKMgoxCjIKMQoxCjIKMgoyCjIKMAoyCjAKMQoyCjEKMQozCjIKMQoxCjIKMQoxCjIKMQoxCjEKMQoxCjIKMwoxCjAKMQoxCjEKMwoyCjMKMQozCjEKMAowCjEKMQoyCjEKMQoyCjAKMAozCjIKMQowCjEKMgoyCjIKMAoyCjEKMAozCjEKMgoyCjIKMAozCjAKMAowCjIKMgowCjEKMgozCjEKMgoyCjEKMQoyCjEKMwoxCjEKMQoyCjIKMQoxCjAKMgoyCjEKMAoyCjEKMQoyCjEKMQoxCjEKMQowCjAKMwowCjEKMAoxCjEKMQoxCjIKMgoxCjIKMgoyCjEKMAoyCjIKMgoyCjMKMgoxCjAKMQoxCjEKMgoxCjIKMQowCjMKMgozCjIKMQoyCjIKMQoxCjEKMQowCjEKMgozCjEKMAowCjEKMAoxCjEKMQozCjEKMQoxCjIKMQoxCjIKMQoxCjIKMQoxCjAKMAowCjEKMQoxCjIKMAoyCjMKMgowCjMKMQowCjMKMgoyCjAKMwoxCjAKMwozCjMKMAozCjAKMgoyCjAKMgozCjEKMQoxCjIKMQozCjIKMAozCjAKMQoyCjIKMQozCjIKMAoyCjAKMwoxCjMKMgoxCjEKMgoxCjAKMwoyCjEKMgowCjIKMQoyCjIKMQoxCjEKMwowCjEKMgoxCjIKMgoxCjIKMAoxCjIKMgozCjEKMQoxCjEKMAoxCjIKMAoyCjMKMgoxCjEKMQozCjMKMQoxCjIKMAoxCjMKMQoxCjEKMQowCjMKMQoxCjEKMgoxCjEKMgozCjEKMgoyCjEKMAoxCjEKMAowCjEKMwoxCjEKMwoxCjIKMgoxCjIKMgoxCjAKMQoxCjMKMAoxCjIKMwoyCjAKMAoxCjAKMwoxCjIKMQozCjEKMgoyCjIKMQoxCjAKMwowCjEKMQoyCjEKMQoyCjIKMwowCjAKMQoxCjEKMQoxCjEKMgoxCjEKMQoyCjAKMwoyCjEKMQoxCjIKMgoyCjEKMAowCjEKMgoyCjEKMQowCjIKMgowCjMKMwoyCjMKMQozCjIKMgoxCjEKMQoxCjIKMQoxCjEKMQoxCjIKMgoyCjIKMAowCjEKMgowCjIKMwowCjIKMQoxCjEKMQoxCjIKMwowCjEKMgoxCjIKMgozCjEKMgoyCjIKMgoyCjEKMgoyCjMKMQoxCjIKMAoxCjAKMQoyCjIKMgoyCjMKMwoxCjIKMQoxCjIKMQozCjIKMQoyCjAKMQoxCjEKMwoxCjEKMQowCjAKMgoxCjAKMgoxCjEKMQoxCjAKMgoxCjEKMwowCjEKMQoxCjMKMQoyCjAKMQoxCjIKMwowCjIKMQoxCjIKMgoxCjEKMQoxCjMKMQoyCjEKMAoxCjIKMQowCjEKMAoxCjEKMgozCjEKMgoxCjEKMQoyCjEKMwoxCjIKMQoxCjIKMgoyCjEKMQoxCjEKMQoxCjIKMgoxCjEKMgoyCjAKMgoxCjEKMQowCjEKMgoxCjEKMQoyCjAKMQoyCjIKMwowCjIKMgoyCjAKMQoyCjIKMgoyCjEKMQowCjEKMgowCjEKMwoxCjAKMAowCjAKMwoxCjEKMQozCjEKMAoxCjEKMAoxCjIKMQoxCjAKMwoxCjIKMQowCjEKMgowCjEKMgoxCjMKMwoyCjEKMQoyCjEKMgozCjIKMQowCjMKMgoxCjAKMQoxCjIKMQoyCjEKMAoyCjIKMAoyCjMKMAoyCjIKMQozCjIKMAozCjMKMAoyCjMKMwoyCjAKMQoyCjAKMwoxCjAKMgoyCjAKMgoxCjIKMQoyCjIKMAoyCjEKMAowCjEKMQoxCjEKMQoxCjEKMQoyCjEKMQoxCjAKMQoxCjAKMgoyCjEKMgoxCjEKMgoyCjIKMwoyCjIKMQoxCjIKMQoxCjAKMQoyCjIKMAozCjIKMAoyCjIKMwoxCjAKMwowCjIKMQoxCjAKMAoxCjIKMQowCjIKMAoyCjEKMwozCjIKMgoyCjEKMQoxCjEKMQoxCjEKMQoxCjIKMQoxCjMKMAowCjEKMQoxCjIKMQoxCjEKMQoyCjIKMQoxCjEKMQoxCjIKMwoxCjIKMQozCjIKMgoxCjMKMQoxCjIKMgoyCjAKMAoyCjAKMgoxCjEKMgoxCjIKMQoxCjEKMgoxCjIKMwowCjMKMQozCjIKMQoyCjEKMgoxCjEKMQozCjEKMgoxCjEKMAowCjAKMQoxCjEKMgowCjEKMwoyCjIKMQowCjIKMgowCjAKMgoyCjIKMwoxCjEKMAoxCjMKMgoyCjIKMAowCjEKMgoyCjEKMgowCjEKMwoyCjEKMgoxCjEKMQoyCjEKMQoyCjIKMgowCjEKMQoyCjIKMQoxCjEKMQoxCjIKMgowCjEKMQowCjEKMgoyCjIKMAoyCjEKMQozCjAKMgoyCjMKMQowCjIKMQowCjIKMAoyCjEKMgozCjEKMQoxCjEKMQoxCjEKMAoxCjEKMwozCjEKMgowCjAKMgoyCjEKMQoyCjIKMQoxCjEKMgoxCjMKMQowCjIKMQoyCjEKMgoxCjIKMgowCjEKMgozCjIKMQoxCjAKMQowCjIKMwoyCjEKMAozCjEKMgoxCjEKMQoyCjMKMQozCjEKMQoxCjEKMAoxCjEKMgowCjAKMQoxCjIKMQoyCjEKMwoxCjEKMQoxCjIKMgoyCjEKMQoyCjEKMgowCjIKMQowCjEKMQoyCjEKMQoyCjMKMgoyCjIKMgoxCjAKMgoxCjEKMQowCjAKMgoxCjEKMAoyCjMKMQozCjIKMQowCjIKMwowCjEKMwoxCjAKMgoxCjIKMgoxCjEKMAoxCjEKMQoyCjIKMQoxCjIKMgozCjIKMQowCjIKMwoxCjEKMQowCjAKMQoxCjIKMAozCjEKMgowCjIKMAoxCjIKMQowCjEKMgoyCjIKMwoxCjIKMAoxCjMKMAoyCjIKMwoxCjIKMQoyCjIKMAozCjEKMQoyCjMKMwoxCjMKMgozCjIKMgoyCjEKMQoyCjEKMAoyCjIKMQoxCjMKMQozCjIKMAoyCjEKMwoxCjAKMQowCjEKMgoxCjIKMgoxCjAKMQozCjEKMQozCjEKMAoxCjEKMAowCjAKMAoyCjIKMQoyCjMKMAoxCjEKMgoyCjIKMQoyCjEKMQoxCjIKMwozCjEKMQoyCjIKMQowCjEKMgoxCjEKMQowCjEKMgowCjAKMwoyCjIKMQoxCjEKMAoxCjEKMAoxCjIKMgoxCjEKMQoxCjAKMgoyCjMKMQoyCjIKMwoyCjMKMwoyCjEKMQoxCjEKMQoyCjIKMQowCjAKMAoyCjAKMQoyCjAKMwoyCjMKMQoyCjMKMgoxCjIKMQoxCjMKMwoyCjEKMgoxCjAKMgoxCjIKMgozCjEKMQoxCjAKMgowCjIKMQoyCjEKMgoxCjEKMgoyCjEKMQoyCjIKMgozCjEKMQoyCjAKMgozCjEKMQoxCjMKMQoyCjEKMQoyCjEKMgowCjIKMgoxCjIKMgoyCjAKMAowCjAKMQozCjEKMgoxCjIKMgoyCjIKMgowCjIKMQoyCjMKMQoxCjMKMAowCjEKMQoyCjEKMgoxCjEKMQoyCjEKMgozCjIKMgozCjEKMgozCjEKMgoxCjIKMQoxCjEKMQoyCjIKMQoxCjAKMQozCjEKMgoyCjAKMgoyCjAKMgoxCjMKMQoxCjMKMQowCjEKMgoxCjAKMwoyCjEKMQozCjEKMQozCjAKMgoxCjMKMwowCjMKMQowCjEKMgowCjMKMAowCjMKMQoyCjIKMQoyCjAKMQowCjEKMQoxCjIKMgowCjEKMgoyCjAKMAoxCjEKMQowCjEKMAozCjMKMAoxCjIKMAowCjEKMAoyCjIKMAowCjIKMQowCjMKMQoxCjEKMwoxCjEKMQoyCjMKMQozCjEKMQozCjEKMAoxCjEKMAoxCjIKMQowCjIKMQoyCjIKMAoxCjEKMQowCjIKMAoxCjIKMgoyCjEKMAoyCjAKMQoyCjIKMAoxCjEKMgowCjAKMQoyCjEKMQowCjEKMgowCjEKMQowCjEKMQozCjAKMgoyCjIKMgoxCjIKMwoxCjEKMgoxCjMKMgoxCjEKMQowCjEKMQoxCjEKMgoxCjIKMQoxCjEKMQowCjEKMQowCjEKMwoyCjAKMAozCjEKMgowCjEKMwoyCjIKMgoxCjIKMwoxCjIKMgoxCjIKMgozCjIKMQoxCjAKMQozCjEKMgozCjEKMQoxCjAKMAoxCjIKMQowCjAKMgoyCjAKMQozCjEKMAoxCjEKMQoxCjMKMQoyCjMKMgoxCjEKMQoyCjAKMQoxCjEKMQoxCjEKMgozCjIKMQoyCjIKMgowCjIKMAoyCjIKMgoyCjMKMQoyCjEKMQowCjEKMgozCjIKMgowCjEKMgoxCjMKMAoxCjEKMQoyCjIKMgoxCjEKMgowCjEKMAoxCjIKMQoxCjAKMAowCjEKMgoyCjEKMQoxCjAKMQowCjEKMQoyCjEKMAoyCjEKMQozCjAKMwoyCjEKMgoxCjEKMQozCjEKMgozCjMKMQozCjIKMQoxCjEKMQoxCjEKMgowCjEKMAoxCjIKMgoxCjMKMgowCjEKMQoyCjMKMQoyCjEKMwoyCjIKMQoxCjEKMwozCjEKMgoxCjEKMgoxCjMKMgoyCjEKMwozCjIKMgoyCjEKMQoxCjAKMgowCjIKMQoxCjIKMwoxCjIKMQoxCjIKMgoyCjMKMgoyCjAKMAozCjEKMQowCjEKMgoxCjEKMQoyCjEKMgoxCjIKMQoxCjEKMwoyCjIKMAoxCjAKMQoxCjIKMwowCjEKMgoxCjEKMwoxCjEKMQowCjAKMgoxCjEKMQoxCjIKMQoxCjEKMQoyCjIKMAoyCjEKMwoyCjEKMQoxCjIKMQoyCjEKMwoxCjEKMQoxCjAKMgowCjIKMgoxCjIKMwoyCjIKMAoyCjEKMQoyCjAKMwoyCjMKMwowCjIKMgoyCjEKMwozCjEKMQoxCjIKMAoxCjMKMgowCjAKMgozCjEKMgoxCjEKMQoxCjEKMgoyCjEKMgozCjEKMwoxCjEKMgoxCjEKMgoyCjEKMQoxCjEKMgoxCjEKMwoxCjAKMgoxCjEKMwowCjAKMwoxCjIKMQoyCjEKMgoxCjAKMgowCjEKMQozCjEKMgoyCjEKMQozCjEKMQoxCjAKMwoxCjEKMwoxCjEKMgowCjIKMwoxCjAKMQoyCjIKMQoyCjEKMQozCjEKMwoxCjAKMwowCjEKMQoxCjMKMAowCjEKMQoxCjAKMQo=\" target=\"_blank\">Download CSV file</a>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from IPython.display import HTML\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import base64\n",
    "\n",
    "def create_download_link(df, title = \"Download CSV file\", filename = \"submission.csv\"):  \n",
    "    csv = df.to_csv(index=False)\n",
    "    b64 = base64.b64encode(csv.encode())\n",
    "    payload = b64.decode()\n",
    "    html = '<a download=\"{filename}\" href=\"data:text/csv;base64,{payload}\" target=\"_blank\">{title}</a>'\n",
    "    html = html.format(payload=payload,title=title,filename=filename)\n",
    "    return HTML(html)\n",
    "\n",
    "create_download_link(sub)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.6"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
